This document is a lab-book for visual data minining carried out on the CHUSJ dataset.
# Read dataset from SPSS file
BD_chusj <- haven::read_sav(file.path("data", "BD_chusj_MAIN.sav"))
BD_chusj <- BD_chusj %>% zap_formats %>% zap_label %>% zap_widths
hist(BD_chusj$Age, col = "azure3",
main = "Histogram of Age",
xlab = "Age" )
# Add boxplot
par(new = TRUE)
boxplot(BD_chusj$Age,
horizontal = TRUE,
axes = FALSE,
boxwex = 0.25,
col = rgb(0.81, 0.85, 0.9, alpha = 0.5))
summary(BD_chusj$Age) %>% print()
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 55.00 65.00 62.81 80.00 95.00
Variable Date Of First Positive Lab Result presented with different granularities: Daily, Weekly and Monthly.
FirstDate <-
BD_chusj %>%
select(DateOfFirstPositiveLabResult) %>%
group_by(date = DateOfFirstPositiveLabResult) %>%
summarise(freq = n())
FirstDate_daily <- xts::as.xts(FirstDate$freq, order.by = as.Date(FirstDate$date))
FirstDate_weekly <- xts::apply.weekly(FirstDate_daily, sum)
FirstDate_monthly <- xts::apply.monthly(FirstDate_daily, sum)
par(mfrow = c(3,1))
plot(FirstDate_daily, col = "cadetblue4", lwd = 3,
main = "Date of first positive lab result - daily")
plot(FirstDate_weekly, col = "cadetblue4", lwd = 3,
main = "Date of first positive lab result - weekly")
plot(FirstDate_monthly, col = "cadetblue4", lwd = 3,
main = "Date of first positive lab result - monhly")
par(mfrow = c(1,1))
# Clear aux variables
rm(list = ls()[grep("^FirstDate", ls())])
Count of comorbidities (total n = 2688 patient).
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- sapply(comorbidities, sum) %>% as.data.frame()
comorbidities <- data.frame(comorbidities = rownames(comorbidities),
count = comorbidities[,1])
ggplot(data = comorbidities, aes(x = reorder(comorbidities, count), y = count)) +
geom_bar(stat = 'identity', color = "cadetblue4", fill = "azure3", width = 0.85) +
geom_text(aes(label = count), hjust = -0.3, color = "slategray4", size = 3.5) +
coord_flip() +
ggtitle(label = "Comorbidities",
subtitle = "Frequency in 2688 patients") +
labs(x = "", y = "")
rm(comorbidities)
Relative frequency of comorbidities among patient (total n = 2688 patients)
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- sapply(comorbidities, sum) %>% t() %>% as.data.frame()
comorbidities <- comorbidities / nrow(BD_chusj) # convert to percentage
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(comorbidities), 0.1, f = ceiling)
# Prepare data for radar chart
comorbidities <- rbind(max = rep(max.axis, length(comorbidities)),
min = rep(0, length(comorbidities)),
value = comorbidities)
radarchart(comorbidities,
axistype = 1,
# custom polygon
pcol = rgb(0.2, 0.5, 0.5, 0.9), # line color
pfcol = rgb(0.2, 0.5, 0.5, 0.2), # fill-in color
plwd = 2, # line width
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.9, # font size of net labels
vlcex = 0.8, # font size of outer labels
title = "Overall frequency of comorbidities (%)")
# Frequency table
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- sapply(comorbidities, sum) %>% as.data.frame()
names(comorbidities) <- "frequency"
comorbidities$percentage <- round(comorbidities$frequency / nrow(BD_chusj), 3) # convert to percentage
comorbidities %>% arrange(desc(percentage)) %>% rmarkdown::paged_table()
Count of ATC groups per Comorbidity (total n = 2688 patients). ATC groups are presented in decreasing order of frequency.
#' ATCs and Comorbidities
ATC_lv1 <- BD_chusj[, c(469, 378:390)]
comorbidities <- BD_chusj[,c(7:18, 23)]
ATCs_comorbidities <- cbind(ATC_lv1, comorbidities)
# Create an empty dataset
ATC.Comorbs_matrix <- data.frame(matrix(NA,
ncol = ncol(comorbidities),
nrow = ncol(ATC_lv1)))
colnames(ATC.Comorbs_matrix) <- names(comorbidities)
rownames(ATC.Comorbs_matrix) <- names(ATC_lv1)
# Compute matrix
for (atc in rownames(ATC.Comorbs_matrix)) {
for (comorb in colnames(ATC.Comorbs_matrix)) {
ATC.Comorbs_matrix[atc, comorb] <-
ATCs_comorbidities[ATCs_comorbidities[,atc] == 1 & ATCs_comorbidities[,comorb] == 1,
c(atc, comorb)] %>% nrow()
}
}
rm(atc, comorb, ATCs_comorbidities, comorbidities)
# Check Top-N ATC families
ATCs <- sapply(ATC_lv1, sum) %>% as.data.frame() %>% arrange(desc(.))
# Select Top-n ATC families
#ATCs <- ATCs %>% head(4)
# Prepare data for radar chart
radar.data <- ATC.Comorbs_matrix[rownames(ATCs), ]
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(radar.data), 50, f = ceiling)
radar.data <- rbind(max = rep(max.axis, ncol(radar.data)),
min = rep(0, ncol(radar.data)),
radar.data)
# Define colors
# colors <- RColorBrewer::brewer.pal(nrow(radar.data) - 2, "Set1")
colors <- c("firebrick3", "steelblue2", "springgreen3",
"darkorchid3", "darkorange2", "gold2",
"tomato4", "chartreuse4", "cyan3",
"wheat3", "deeppink2", "khaki2",
"royalblue2", "coral1")
colors_border <- colors
colors_in <- scales::alpha(colors, alpha = 0.2)
radarchart(radar.data,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = seq(0, max.axis, length.out = 5),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Frequency of top-4 ATC groups per Comorbidity (n)")
legend(x = -1.7,
y = 1,
legend = rownames(radar.data[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
# Frequency table
ATC.Comorbs_matrix[rownames(ATCs), ] %>% rmarkdown::paged_table()
Here, the frequencies are presented relatively to each comorbidity - ie. percentage of patients with Cancer using drugs of group ATC.B.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Version 2 - with relative frequencies (%)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- sapply(comorbidities, sum) %>% t() %>% as.data.frame()
ATC.Comorbs_matrix.t <- ATC.Comorbs_matrix %>% t() %>% as.data.frame()
ATC.Comorbs_matrix.perc <- sapply(ATC.Comorbs_matrix.t, function(i){round(i / comorbidities[1,], 3)})
# Prepare data for radar chart
radar.data <- matrix(unlist(ATC.Comorbs_matrix.perc),
ncol = length(comorbidities),
nrow = length(ATC_lv1),
byrow = TRUE) %>% as.data.frame()
colnames(radar.data) = names(comorbidities)
rownames(radar.data) = names(ATC_lv1)
radar.data <- rbind(max = rep(1, ncol(radar.data)),
min = rep(0, ncol(radar.data)),
radar.data[rownames(ATCs),])
# Define colors
# colors <- RColorBrewer::brewer.pal(nrow(radar.data) - 2, "Set1")
colors <- c("firebrick3", "steelblue2", "springgreen3",
"darkorchid3", "darkorange2", "gold2",
"tomato4", "chartreuse4", "cyan3",
"wheat3", "deeppink2", "khaki2",
"royalblue2", "coral1")
colors_border <- colors
colors_in <- scales::alpha(colors, alpha = 0.2)
radarchart(radar.data,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
# caxislabels = seq(0, max.axis, length.out = 5),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Frequency of ATC per Comorbidity (n)")
legend(x = -1.7, #1.1,
y = 1, #-0.65,
legend = rownames(radar.data[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
# Frequency table - Absolute frequencies
radar.data[-c(1,2),] %>% rmarkdown::paged_table()
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- cbind(Sex = BD_chusj$Gender,
comorbidities)
comorbidities <- rbind(
# total = comorbidities %>% sapply(., sum) %>% t() %>% as.data.frame(),
male = comorbidities %>% filter(Sex == 1) %>% sapply(., sum) %>% t() %>% as.data.frame(),
female = comorbidities %>% filter(Sex == 0) %>% sapply(., sum) %>% t() %>% as.data.frame())
comorbidities$Sex <- NULL
# convert to percentage
comorbidities <- comorbidities / nrow(BD_chusj)
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(comorbidities), 0.1, f = ceiling)
# Prepare data for radar chart
comorbidities <- rbind(max = rep(max.axis, length(comorbidities)),
min = rep(0, length(comorbidities)),
comorbidities)
# Define colors
# colors <- RColorBrewer::brewer.pal(3, "BuPu")
colors <- c("dodgerblue3", "hotpink1")
colors_border <- colors
colors_in <- c(scales::alpha(colors[1], 0.15), scales::alpha(colors[2], 0.15))
radarchart(comorbidities,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Frequency of Comorbidities per Sex (%)")
legend(x = 1.1,
y = -1,
legend = rownames(comorbidities[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
{
cat("Frequencies in percentage (%):\n")
(comorbidities[-c(1:2),] * 100) %>% round(., digits = 1) %>%
t() %>% as.data.frame() %>% print()
}
## Frequencies in percentage (%):
## male female
## CANC 17.2 14.0
## CEREBROVASCULAR 6.0 4.6
## DIAB 16.9 12.4
## KIDNEY 9.3 6.3
## LIVER 5.1 2.1
## LUNG_I 7.1 1.5
## LUNG_II 5.8 3.9
## HEART 11.7 7.6
## TRANSP 1.2 0.5
## OBESITY 8.9 10.8
## SMOKING 15.6 1.6
## NERVOUS 14.4 12.8
## PreconditionOther 8.3 4.4
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- cbind(IntensiveCare = BD_chusj$IntensiveCare,
comorbidities)
comorbidities <- rbind(
# total = comorbidities %>% sapply(., sum) %>% t() %>% as.data.frame(),
IC_no = comorbidities %>% filter(IntensiveCare == 0) %>% sapply(., sum) %>% t() %>% as.data.frame(),
IC_yes = comorbidities %>% filter(IntensiveCare == 1) %>% sapply(., sum) %>% t() %>% as.data.frame())
comorbidities$IntensiveCare <- NULL
# convert to percentage
comorbidities <- comorbidities / nrow(BD_chusj)
# comorbidities %>% t() %>% round(., digits = 3) %>% as.data.frame() %>% print()
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(comorbidities), 0.1, f = ceiling)
# Prepare data for radar chart
comorbidities <- rbind(max = rep(max.axis, length(comorbidities)),
min = rep(0, length(comorbidities)),
comorbidities)
# Define colors
# colors <- RColorBrewer::brewer.pal(3, "BuPu")
colors <- c("seagreen4", "indianred3")
colors_border <- colors
colors_in <- c(scales::alpha(colors[1], 0.3), scales::alpha(colors[2], 0.6))
radarchart(comorbidities,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Frequency of Comorbidities in Intensive Care(%)")
legend(x = 1.1,
y = -1,
legend = rownames(comorbidities[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
{
cat("Frequencies in percentage (%):\n")
(comorbidities[-c(1:2),] * 100) %>% round(., digits = 1) %>%
t() %>% as.data.frame() %>% print()
}
## Frequencies in percentage (%):
## IC_no IC_yes
## CANC 21.3 9.9
## CEREBROVASCULAR 8.1 2.5
## DIAB 19.6 9.7
## KIDNEY 11.2 4.4
## LIVER 4.1 3.0
## LUNG_I 5.7 2.9
## LUNG_II 3.5 6.2
## HEART 13.9 5.4
## TRANSP 0.9 0.7
## OBESITY 10.4 9.3
## SMOKING 9.9 7.3
## NERVOUS 20.5 6.7
## PreconditionOther 8.9 3.8
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- cbind(Outcome = BD_chusj$Outcome,
comorbidities)
comorbidities <- rbind(
# total = comorbidities %>% sapply(., sum) %>% t() %>% as.data.frame(),
Recovered = comorbidities %>% filter(Outcome == 0) %>% sapply(., sum) %>% t() %>% as.data.frame(),
Died = comorbidities %>% filter(Outcome == 1) %>% sapply(., sum) %>% t() %>% as.data.frame(),
OtherHospital = comorbidities %>% filter(Outcome == 2) %>% sapply(., sum) %>% t() %>% as.data.frame())
comorbidities$Outcome <- NULL
# convert to percentage
comorbidities <- comorbidities / nrow(BD_chusj)
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(comorbidities), 0.1, f = ceiling)
# Prepare data for radar chart
comorbidities <- rbind(max = rep(max.axis, length(comorbidities)),
min = rep(0, length(comorbidities)),
comorbidities)
# Define colors
# colors <- RColorBrewer::brewer.pal(3, "BuPu")
colors <- c("forestgreen", "firebrick3", "dodgerblue3")
colors_border <- colors
colors_in <- c(scales::alpha(colors[1], 0.15), scales::alpha(colors[2], 0.15))
radarchart(comorbidities,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Frequency of Comorbidities per Outcome (%)")
legend(x = 1.1,
y = -0.8,
legend = rownames(comorbidities[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
{
cat("Frequencies in percentage (%):\n")
(comorbidities[-c(1:2),] * 100) %>% round(., digits = 1) %>%
t() %>% as.data.frame() %>% print()
}
## Frequencies in percentage (%):
## Recovered Died OtherHospital
## CANC 19.6 8.5 3.0
## CEREBROVASCULAR 6.0 3.2 1.4
## DIAB 19.6 7.0 2.6
## KIDNEY 9.2 4.8 1.6
## LIVER 4.8 1.7 0.6
## LUNG_I 5.0 2.6 1.0
## LUNG_II 4.8 3.0 2.0
## HEART 11.0 6.0 2.2
## TRANSP 1.3 0.3 0.0
## OBESITY 12.9 3.6 3.1
## SMOKING 10.6 4.2 2.3
## NERVOUS 15.8 8.1 3.2
## PreconditionOther 8.8 2.2 1.5
#' ATC codes (first level) are listed in variables "469 + 378-390"
ATC_lv1 <- BD_chusj[, c(469, 378:390)]
# Create an empty dataset
heatmap.matrix <- data.frame(matrix(NA, ncol = length(ATC_lv1), nrow = length(ATC_lv1)))
colnames(heatmap.matrix) <- rownames(heatmap.matrix) <- names(ATC_lv1)
# Fill in dataset with count of co-occurences
for (i in 1:length(ATC_lv1)) {
for (j in i:length(ATC_lv1)) {
heatmap.matrix[i,j] <- heatmap.matrix[j,i] <- nrow(ATC_lv1[,c(i,j)][ATC_lv1[,i]==1 & ATC_lv1[,j]==1,])
}
}; rm(i, j)
# Reshape data matrix to unrolled data, to feed heatmap
heatmap.data <- reshape2::melt(as.matrix(heatmap.matrix))
colnames(heatmap.data) <- c("x", "y", "value")
# Change co-occurance counts to to percentages
heatmap.data$value <- round(heatmap.data$value / nrow(ATC_lv1), digits = 3)
# Plot graphic
ggplot(heatmap.data, aes(x, y, fill = value)) +
geom_tile(color = "grey95",
lwd = 1,
linetype = 1) +
theme(axis.text.x = element_text(angle = 90),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
ggtitle(label = "Relative co-frequencies in 1st-level ATC drug groups",
subtitle = "Expressed as % of total patients (n = 2688 patients)") +
geom_text(aes(label = sprintf("%0.3f", value)), color = "gray30", size = 3) +
scale_fill_gradient(low = "lightsteelblue1", high = "darkolivegreen3") +
guides(fill = guide_colourbar(barwidth = 0.5,
barheight = 20,
title = "%"))
Each cell in this map presents the relative co-frequency of patients using 2 drug groups. That is, cell “ATC.A x ATC.C”, figuring 0.360, indicates that 36% of the totality of patients (n=2688) uses simultaneously drugs of ATC groups A and C.
The diagonal line - eg. “ATC.C x ATC.C”, figuring 0.412 - can be seen as a relative frequency of the use of that ATC drug group alone. In this case, a 41.2% of the totality of patients in the dataset are medicated with drugs from ATC group C.
As main findings:
ATC.B is the single most prevalent ATC drug family, being used by 89.1% of patients.
ATC.A + ATC.B is the most prevalent drug family dual association, being jointly used by 66.6% of patients.
groups <- colnames(heatmap.matrix)
group.combinations <- combn(groups, 2) %>% as.data.frame()
cofrequencies <-
sapply(1:ncol(group.combinations),
FUN = function(n){
heatmap.data %>%
filter(x == group.combinations[1,n],
y == group.combinations[2,n]) %>%
select(value)
})
group.combinations <-
cbind(t(group.combinations), cofrequencies) %>%
as.data.frame() %>%
lapply(., as.character) %>%
as.data.frame()
group.combinations$cofrequencies <- as.numeric(group.combinations$cofrequencies)
group.combinations %>% arrange(desc(cofrequencies)) %>% head(10) %>% print
## V1 V2 cofrequencies
## 1 ATC.A ATC.B 0.666
## 2 ATC.B ATC.N 0.556
## 3 ATC.B ATC.J 0.524
## 4 ATC.A ATC.N 0.507
## 5 ATC.B ATC.R 0.495
## 6 ATC.A ATC.J 0.464
## 7 ATC.J ATC.N 0.440
## 8 ATC.B ATC.C 0.400
## 9 ATC.A ATC.R 0.396
## 10 ATC.A ATC.C 0.360
The most frequent dual combinations are the following, all of which occurring in more than 50% of patients:
ATC.A + ATC.B : 66,6 %
ATC.B + ATC.N : 55,6 %
ATC.B + ATC.J : 52,4 %
ATC.A + ATC.N : 50,7 %
heatmap_2groups <- function(index1 = NA,
index2 = NA) {
#' This function plots a heatmap of 2 ATC groups at 2nd level
#' inputs: the indexes on the 2nd levels codes for each group
#' ATC.A: 362:377 (16 codes)
#' ATC.B: 391:395 (5 codes)
#' ATC.J: 425:430 (6 codes)
#' ATC.N: 441:447 (7 codes)
ATC_group1 <- BD_chusj[, index1]
ATC_group2 <- BD_chusj[, index2]
# Create an empty dataset
heatmap.matrix <- data.frame(matrix(NA,
ncol = ncol(ATC_group1),
nrow = ncol(ATC_group2)))
colnames(heatmap.matrix) <- names(ATC_group1)
rownames(heatmap.matrix) <- names(ATC_group2)
ATC_groups12 <- cbind(ATC_group1, ATC_group2)
for (g1 in seq_along(ATC_group1)) {
for (g2 in seq_along(ATC_group2)) {
heatmap.matrix[g2, g1] <-
nrow(ATC_groups12[, c(g1, g2 + ncol(ATC_group1))][ATC_groups12[,g1]==1 &
ATC_groups12[,g2+ncol(ATC_group1)]==1,])
}
}
heatmap.data <- reshape2::melt(as.matrix(heatmap.matrix))
colnames(heatmap.data) <- c("x", "y", "value")
# change values to percentage
heatmap.data$value <- round(heatmap.data$value / nrow(ATC_lv1), digits = 3)
family.group1 <- substring(names(ATC_group1)[1], 5, 5)
family.group2 <- substring(names(ATC_group2)[1], 5, 5)
p <-
ggplot(heatmap.data, aes(x, y, fill = value)) +
geom_tile(color = "grey95",
lwd = 1,
linetype = 1) +
theme(axis.text.x = element_text(angle = 0),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
ggtitle(label = paste("Relative co-frequences in 2nd-level ATC drug groups",
family.group1, "and", family.group2),
subtitle = "Expressed as % of total patients (n = 2688)") +
geom_text(aes(label = sprintf("%0.3f", value)), color = "gray30", size = 3) +
scale_fill_gradient(low = "lightsteelblue1", high = "darkolivegreen3") +
# coord_fixed() +
guides(fill = guide_colourbar(barwidth = 0.5,
barheight = 20,
title = "%"))
# Find most prevalent combinations
heatmap.data %>%
arrange(desc(value)) %>%
mutate(perc = round(value / nrow(BD_chusj), 3)) %>%
relocate(y) %>%
rename(!!family.group1 := y, !!family.group2 := x, freq = value) %>%
head() %>% print()
return(p)
}
heatmap_2groups(index1 = 362:377, index2 = 391:395) # A.B
## A B freq perc
## 1 ATC.A02 ATC.B01 0.333 0
## 2 ATC.A02 ATC.B05 0.316 0
## 3 ATC.A03 ATC.B01 0.310 0
## 4 ATC.A10 ATC.B01 0.302 0
## 5 ATC.A03 ATC.B05 0.286 0
## 6 ATC.A10 ATC.B05 0.255 0
heatmap_2groups(index1 = 391:395, index2 = 441:447) # B.N
## B N freq perc
## 1 ATC.B01 ATC.N02 0.459 0
## 2 ATC.B05 ATC.N02 0.415 0
## 3 ATC.B01 ATC.N05 0.247 0
## 4 ATC.B05 ATC.N05 0.239 0
## 5 ATC.B01 ATC.N01 0.179 0
## 6 ATC.B05 ATC.N01 0.166 0
heatmap_2groups(index1 = 391:395, index2 = 425:430) # B.J
## B J freq perc
## 1 ATC.B01 ATC.J01 0.481 0
## 2 ATC.B05 ATC.J01 0.413 0
## 3 ATC.B03 ATC.J01 0.076 0
## 4 ATC.B02 ATC.J01 0.035 0
## 5 ATC.B01 ATC.J05 0.026 0
## 6 ATC.B01 ATC.J02 0.025 0
heatmap_2groups(index1 = 362:377, index2 = 441:447) # A.N
## A N freq perc
## 1 ATC.A03 ATC.N02 0.309 0
## 2 ATC.A02 ATC.N02 0.301 0
## 3 ATC.A10 ATC.N02 0.217 0
## 4 ATC.A02 ATC.N05 0.184 0
## 5 ATC.A03 ATC.N05 0.180 0
## 6 ATC.A02 ATC.N01 0.157 0